home *** CD-ROM | disk | FTP | other *** search
- #! /usr/local/bin/python
-
- class Markov:
- def __init__(self, histsize, choice):
- self.histsize = histsize
- self.choice = choice
- self.trans = {}
- def add(self, state, next):
- if not self.trans.has_key(state):
- self.trans[state] = [next]
- else:
- self.trans[state].append(next)
- def put(self, seq):
- n = self.histsize
- add = self.add
- add(None, seq[:0])
- for i in range(len(seq)):
- add(seq[max(0, i-n):i], seq[i:i+1])
- add(seq[len(seq)-n:], None)
- def get(self):
- choice = self.choice
- trans = self.trans
- n = self.histsize
- seq = choice(trans[None])
- while 1:
- subseq = seq[max(0, len(seq)-n):]
- options = trans[subseq]
- next = choice(options)
- if not next: break
- seq = seq + next
- return seq
-
- def test():
- import sys, string, whrandom, getopt
- args = sys.argv[1:]
- try:
- opts, args = getopt.getopt(args, '0123456789cdw')
- except getopt.error:
- print 'Usage: markov [-#] [-cddqw] [file] ...'
- print 'Options:'
- print '-#: 1-digit history size (default 2)'
- print '-c: characters (default)'
- print '-w: words'
- print '-d: more debugging output'
- print '-q: no debugging output'
- print 'Input files (default stdin) are split in paragraphs'
- print 'separated blank lines and each paragraph is split'
- print 'in words by whitespace, then reconcatenated with'
- print 'exactly one space separating words.'
- print 'Output consists of paragraphs separated by blank'
- print 'lines, where lines are no longer than 72 characters.'
- histsize = 2
- do_words = 0
- debug = 1
- for o, a in opts:
- if '-0' <= o <= '-9': histsize = eval(o[1:])
- if o == '-c': do_words = 0
- if o == '-d': debug = debug + 1
- if o == '-q': debug = 0
- if o == '-w': do_words = 1
- if not args: args = ['-']
- m = Markov(histsize, whrandom.choice)
- try:
- for filename in args:
- if filename == '-':
- f = sys.stdin
- if f.isatty():
- print 'Sorry, need stdin from file'
- continue
- else:
- f = open(filename, 'r')
- if debug: print 'processing', filename, '...'
- text = f.read()
- f.close()
- paralist = string.splitfields(text, '\n\n')
- for para in paralist:
- if debug > 1: print 'feeding ...'
- words = string.split(para)
- if words:
- if do_words: data = tuple(words)
- else: data = string.joinfields(words, ' ')
- m.put(data)
- except KeyboardInterrupt:
- print 'Interrupted -- continue with data read so far'
- if not m.trans:
- print 'No valid input files'
- return
- if debug: print 'done.'
- if debug > 1:
- for key in m.trans.keys():
- if key is None or len(key) < histsize:
- print `key`, m.trans[key]
- if histsize == 0: print `''`, m.trans['']
- print
- while 1:
- data = m.get()
- if do_words: words = data
- else: words = string.split(data)
- n = 0
- limit = 72
- for w in words:
- if n + len(w) > limit:
- print
- n = 0
- print w,
- n = n + len(w) + 1
- print
- print
-
- def tuple(list):
- if len(list) == 0: return ()
- if len(list) == 1: return (list[0],)
- i = len(list)/2
- return tuple(list[:i]) + tuple(list[i:])
-
- test()
-